pacman::p_load(tmap, sf, sp, DT, stplanr, performance, reshape2, ggpubr, tidyverse)In-class Ex 10
Overview
In this exercise, we will walk through the steps of Spatial Interaction analysis and highlight key tips in Sections 1-4. This exercise follows a similar approach to Hands-on Ex 9A and 9B.
In Section 5, we will walk through the steps on using postal codes for geocoding.
1. Importing & Preparing Data
We will import and work with three datasets:
- Bus Commuters by Origin/Destination data from LTA DataMall
- Bus Stop Locations: Data on bus stop locations as of the last quarter of 2022.
- MPSZ-2019: Sub-zone boundary data from the URA Master Plan 2019.
1.1 Bus Commuters by Origin/Destination data
odbus <- read_csv("data/aspatial/origin_destination_bus_202210.csv")
odbus$ORIGIN_PT_CODE <- as.factor(odbus$ORIGIN_PT_CODE)
odbus$DESTINATION_PT_CODE <- as.factor(odbus$DESTINATION_PT_CODE) odbus6_9 <- odbus %>%
filter(DAY_TYPE == "WEEKDAY") %>%
filter(TIME_PER_HOUR >= 6 &
TIME_PER_HOUR <= 9) %>%
group_by(ORIGIN_PT_CODE,
DESTINATION_PT_CODE) %>%
summarise(TRIPS = sum(TOTAL_TRIPS))datatable(odbus6_9)1.2 Bus stop locations
busstop <- st_read(dsn = "data/geospatial",
layer = "BusStop") %>%
st_transform(crs = 3414)Reading layer `BusStop' from data source
`/Users/stephentay/stephentay/ISSS626-Geospatial-Analytics/In-class_Ex/In-class_Ex10/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 5159 features and 3 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: 3970.122 ymin: 26482.1 xmax: 48284.56 ymax: 52983.82
Projected CRS: SVY21
1.3 MPSZ-2019
mpsz <- st_read(dsn = "data/geospatial",
layer = "MPSZ-2019") %>%
st_transform(crs = 3414)Reading layer `MPSZ-2019' from data source
`/Users/stephentay/stephentay/ISSS626-Geospatial-Analytics/In-class_Ex/In-class_Ex10/data/geospatial'
using driver `ESRI Shapefile'
Simple feature collection with 332 features and 6 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 103.6057 ymin: 1.158699 xmax: 104.0885 ymax: 1.470775
Geodetic CRS: WGS 84
1.4 Retrieve planning subzone of each bus stop
When using st_intersection(), we overlay busstop on the planning subzone. The resulting output is still at the bus stop level.
busstop_mpsz <- st_intersection(busstop, mpsz) %>%
select(BUS_STOP_N, SUBZONE_C) %>%
st_drop_geometry()1.5 Combine origin/destination bus stops with mpsz
We retrieve the subzone id for the origin bus stops:
od_data <- left_join(odbus6_9 , busstop_mpsz,
by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
rename(ORIGIN_BS = ORIGIN_PT_CODE,
ORIGIN_SZ = SUBZONE_C,
DESTIN_BS = DESTINATION_PT_CODE)We can use the code below to check for duplicated records.
duplicate <- od_data %>%
group_by_all() %>%
filter(n()>1) %>%
ungroup()
duplicate# A tibble: 976 × 4
ORIGIN_BS DESTIN_BS TRIPS ORIGIN_SZ
<chr> <fct> <dbl> <chr>
1 22501 22009 3 JWSZ09
2 22501 22009 3 JWSZ09
3 22501 22451 85 JWSZ09
4 22501 22451 85 JWSZ09
5 22501 22469 18 JWSZ09
6 22501 22469 18 JWSZ09
7 22501 22479 29 JWSZ09
8 22501 22479 29 JWSZ09
9 22501 22509 3 JWSZ09
10 22501 22509 3 JWSZ09
# ℹ 966 more rows
If there are duplicated records, we use unique() to de-duplicate them.
od_data <- unique(od_data)We retrieve the subzone id for the destination bus stops:
od_data <- left_join(od_data , busstop_mpsz,
by = c("DESTIN_BS" = "BUS_STOP_N")) We can use the code below to check for duplicated records.
duplicate <- od_data %>%
group_by_all() %>%
filter(n()>1) %>%
ungroup()
duplicate# A tibble: 1,256 × 5
ORIGIN_BS DESTIN_BS TRIPS ORIGIN_SZ SUBZONE_C
<chr> <chr> <dbl> <chr> <chr>
1 1012 82221 1 <NA> GLSZ05
2 1012 82221 1 <NA> GLSZ05
3 1112 51071 49 <NA> CCSZ01
4 1112 51071 49 <NA> CCSZ01
5 1112 53041 4 <NA> BSSZ01
6 1112 53041 4 <NA> BSSZ01
7 1112 82221 1 <NA> GLSZ05
8 1112 82221 1 <NA> GLSZ05
9 1113 51071 2 <NA> CCSZ01
10 1113 51071 2 <NA> CCSZ01
# ℹ 1,246 more rows
If there are duplicated records, we use unique() to de-duplicate them.
od_data <- unique(od_data)In the final step, we remove rows with null values in either the origin or destination subzone, then aggregate the total number of trips for each origin-destination subzone pair.
od_data <- od_data %>%
rename(DESTIN_SZ = SUBZONE_C) %>%
drop_na() %>%
group_by(ORIGIN_SZ, DESTIN_SZ) %>%
summarise(MORNING_PEAK = sum(TRIPS))
glimpse(od_data)Rows: 14,734
Columns: 3
Groups: ORIGIN_SZ [279]
$ ORIGIN_SZ <chr> "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01…
$ DESTIN_SZ <chr> "AMSZ01", "AMSZ02", "AMSZ03", "AMSZ04", "AMSZ05", "AMSZ06…
$ MORNING_PEAK <dbl> 1998, 8289, 8971, 2252, 6136, 2148, 1620, 1925, 1773, 63,…
2. Visualising Spatial Interaction
2.1 Remove intra-zonal flows
This study excludes intra-zonal flows, so they are removed from the analysis.
od_data_fij <- od_data[od_data$ORIGIN_SZ != od_data$DESTIN_SZ,]2.2 Create inter-zonal desire lines
We use the od2line() function from the stplanr package to create inter-zonal desire lines.
flowLine <- od2line(flow = od_data_fij,
zones = mpsz,
zone_code = "SUBZONE_C")2.3 Visualise desire lines
The map below shows inter-zonal bus commuter flows on weekdays between 6:00 and 9:00 am.
tm_shape(mpsz) +
tm_polygons() +
flowLine %>%
tm_shape() +
tm_lines(lwd = "MORNING_PEAK",
style = "quantile",
scale = c(0.1, 1, 3, 5, 7, 10),
n = 6,
alpha = 0.5)
When inter-zonal bus commuter flows are complex or highly skewed, it can be effective to focus on selected flows, such as those with values greater than or equal to 5,000, as shown below.
tm_shape(mpsz) +
tm_polygons() +
flowLine %>%
filter(MORNING_PEAK >= 5000) %>%
tm_shape() +
tm_lines(lwd = "MORNING_PEAK",
style = "quantile",
scale = c(0.1, 1, 3, 5, 7, 10),
n = 6,
alpha = 0.5)
3. Data Wrangling and Preparation
3.1 Convert to sp dataframe
Before computing the distance matrix (i.e., distances between pairs of locations), we convert the sf dataframe to an sp dataframe. Although the distance matrix can be computed directly from an sf dataframe, the sp method is generally more time-efficient.
mpsz_sp <- as(mpsz, "Spatial")
mpsz_spclass : SpatialPolygonsDataFrame
features : 332
extent : 2667.538, 56396.44, 15748.72, 50256.33 (xmin, xmax, ymin, ymax)
crs : +proj=tmerc +lat_0=1.36666666666667 +lon_0=103.833333333333 +k=1 +x_0=28001.642 +y_0=38744.572 +ellps=WGS84 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
variables : 6
names : SUBZONE_N, SUBZONE_C, PLN_AREA_N, PLN_AREA_C, REGION_N, REGION_C
min values : ADMIRALTY, AMSZ01, ANG MO KIO, AM, CENTRAL REGION, CR
max values : YUNNAN, YSSZ09, YISHUN, YS, WEST REGION, WR
3.2 Compute distance matrix
We use the spDists() function from the sp package to compute the Euclidean distance between the centroids of the planning subzones.
dist <- spDists(mpsz_sp,
longlat = FALSE)
head(dist, n=c(10, 10)) [,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 0.000 3926.0025 3939.108 20252.964 2989.9839 1431.330 19211.836
[2,] 3926.003 0.0000 305.737 16513.865 951.8314 5254.066 16242.523
[3,] 3939.108 305.7370 0.000 16412.062 1045.9088 5299.849 16026.146
[4,] 20252.964 16513.8648 16412.062 0.000 17450.3044 21665.795 7229.017
[5,] 2989.984 951.8314 1045.909 17450.304 0.0000 4303.232 17020.916
[6,] 1431.330 5254.0664 5299.849 21665.795 4303.2323 0.000 20617.082
[7,] 19211.836 16242.5230 16026.146 7229.017 17020.9161 20617.082 0.000
[8,] 14960.942 12749.4101 12477.871 11284.279 13336.0421 16281.453 5606.082
[9,] 7515.256 7934.8082 7649.776 18427.503 7801.6163 8403.896 14810.930
[10,] 6391.342 4975.0021 4669.295 15469.566 5226.8731 7707.091 13111.391
[,8] [,9] [,10]
[1,] 14960.942 7515.256 6391.342
[2,] 12749.410 7934.808 4975.002
[3,] 12477.871 7649.776 4669.295
[4,] 11284.279 18427.503 15469.566
[5,] 13336.042 7801.616 5226.873
[6,] 16281.453 8403.896 7707.091
[7,] 5606.082 14810.930 13111.391
[8,] 0.000 9472.024 8575.490
[9,] 9472.024 0.000 3780.800
[10,] 8575.490 3780.800 0.000
3.3 Label row and column headers of distance matrix
Since the row and column headers of the distance matrix are unlabeled, we perform the following steps to add labels for the planning subzone codes:
- Create a list of planning subzone codes sorted to match the order of the distance matrix.
- Attach SUBZONE_C labels to the rows and columns of the distance matrix for alignment.
sz_names <- mpsz$SUBZONE_C
colnames(dist) <- paste0(sz_names)
rownames(dist) <- paste0(sz_names)
head(dist, n=c(5, 5)) MESZ01 RVSZ05 SRSZ01 WISZ01 MUSZ02
MESZ01 0.000 3926.0025 3939.108 20252.96 2989.9839
RVSZ05 3926.003 0.0000 305.737 16513.86 951.8314
SRSZ01 3939.108 305.7370 0.000 16412.06 1045.9088
WISZ01 20252.964 16513.8648 16412.062 0.00 17450.3044
MUSZ02 2989.984 951.8314 1045.909 17450.30 0.0000
3.4 Convert distance matrix to long table format
We use the melt() function to convert the distance matrix into a long-table format. Note that intra-zonal distances are 0.
distPair <- melt(dist) %>%
rename(dist = value)
head(distPair, 10) Var1 Var2 dist
1 MESZ01 MESZ01 0.000
2 RVSZ05 MESZ01 3926.003
3 SRSZ01 MESZ01 3939.108
4 WISZ01 MESZ01 20252.964
5 MUSZ02 MESZ01 2989.984
6 MPSZ05 MESZ01 1431.330
7 WISZ03 MESZ01 19211.836
8 WISZ02 MESZ01 14960.942
9 SISZ02 MESZ01 7515.256
10 SISZ01 MESZ01 6391.342
3.5 Update intra-zonal distance
We assign a small non-zero constant to replace intra-zonal distances of 0.
First, we find out the minimum inter-zonal distance using summary().
distPair %>%
filter(dist > 0) %>%
summary() Var1 Var2 dist
MESZ01 : 331 MESZ01 : 331 Min. : 173.8
RVSZ05 : 331 RVSZ05 : 331 1st Qu.: 7149.5
SRSZ01 : 331 SRSZ01 : 331 Median :11890.0
WISZ01 : 331 WISZ01 : 331 Mean :12229.4
MUSZ02 : 331 MUSZ02 : 331 3rd Qu.:16401.7
MPSZ05 : 331 MPSZ05 : 331 Max. :49894.4
(Other):107906 (Other):107906
We assign a constant of 50m to replace intra-zonal distances.
distPair$dist <- ifelse(distPair$dist == 0,
50, distPair$dist)
distPair %>%
summary() Var1 Var2 dist
MESZ01 : 332 MESZ01 : 332 Min. : 50
RVSZ05 : 332 RVSZ05 : 332 1st Qu.: 7097
SRSZ01 : 332 SRSZ01 : 332 Median :11864
WISZ01 : 332 WISZ01 : 332 Mean :12193
MUSZ02 : 332 MUSZ02 : 332 3rd Qu.:16388
MPSZ05 : 332 MPSZ05 : 332 Max. :49894
(Other):108232 (Other):108232
We rename the variables for clarity.
distPair <- distPair %>%
rename(orig = Var1,
dest = Var2)3.6 Prepare flow data
The OD matrix of bus commuter flow was computed in Hands-on Ex 9A. We will import the OD matrix for this exercise.
od_data_fii <- read_rds("data/rds/od_data_fii.rds")We compute the total commuter trips between and within planning subzones as follows:
flow_data <- od_data_fii %>%
group_by(ORIGIN_SZ, DESTIN_SZ) %>%
summarize(TRIPS = sum(MORNING_PEAK))
head(flow_data, 10)# A tibble: 10 × 3
# Groups: ORIGIN_SZ [1]
ORIGIN_SZ DESTIN_SZ TRIPS
<chr> <chr> <dbl>
1 AMSZ01 AMSZ01 1998
2 AMSZ01 AMSZ02 8289
3 AMSZ01 AMSZ03 8971
4 AMSZ01 AMSZ04 2252
5 AMSZ01 AMSZ05 6136
6 AMSZ01 AMSZ06 2148
7 AMSZ01 AMSZ07 1620
8 AMSZ01 AMSZ08 1925
9 AMSZ01 AMSZ09 1773
10 AMSZ01 AMSZ10 63
Two new fields specific to intra-zonal flows are added to the dataset.
flow_data$FlowNoIntra <- ifelse(
flow_data$ORIGIN_SZ == flow_data$DESTIN_SZ,
0, flow_data$TRIPS)
flow_data$offset <- ifelse(
flow_data$ORIGIN_SZ == flow_data$DESTIN_SZ,
0.000001, 1)
glimpse(flow_data)Rows: 14,734
Columns: 5
Groups: ORIGIN_SZ [279]
$ ORIGIN_SZ <chr> "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01"…
$ DESTIN_SZ <chr> "AMSZ01", "AMSZ02", "AMSZ03", "AMSZ04", "AMSZ05", "AMSZ06"…
$ TRIPS <dbl> 1998, 8289, 8971, 2252, 6136, 2148, 1620, 1925, 1773, 63, …
$ FlowNoIntra <dbl> 0, 8289, 8971, 2252, 6136, 2148, 1620, 1925, 1773, 63, 542…
$ offset <dbl> 1e-06, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e…
Before joining the flow data with the distance dataset, we need to convert the origin and destination subzones to factor data types.
flow_data$ORIGIN_SZ <- as.factor(flow_data$ORIGIN_SZ)
flow_data$DESTIN_SZ <- as.factor(flow_data$DESTIN_SZ)
flow_data1 <- flow_data %>%
left_join(distPair,
by = c("ORIGIN_SZ" = "orig", "DESTIN_SZ" = "dest"))
glimpse(flow_data1)Rows: 14,734
Columns: 6
Groups: ORIGIN_SZ [279]
$ ORIGIN_SZ <fct> AMSZ01, AMSZ01, AMSZ01, AMSZ01, AMSZ01, AMSZ01, AMSZ01, AM…
$ DESTIN_SZ <fct> AMSZ01, AMSZ02, AMSZ03, AMSZ04, AMSZ05, AMSZ06, AMSZ07, AM…
$ TRIPS <dbl> 1998, 8289, 8971, 2252, 6136, 2148, 1620, 1925, 1773, 63, …
$ FlowNoIntra <dbl> 0, 8289, 8971, 2252, 6136, 2148, 1620, 1925, 1773, 63, 542…
$ offset <dbl> 1e-06, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e…
$ dist <dbl> 50.0000, 810.4491, 1360.9294, 840.4432, 1076.7916, 805.297…
3.7 Preparation of Origin/Destination Population Data
1. Import sub-zone population data
pop <- read_csv("data/aspatial/pop.csv")
glimpse(pop)Rows: 332
Columns: 5
$ PA <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG …
$ SZ <chr> "ANG MO KIO TOWN CENTRE", "CHENG SAN", "CHONG BOON", "KEBUN B…
$ AGE7_12 <dbl> 310, 1140, 1010, 1050, 420, 810, 390, 980, 0, 260, 0, 1190, 6…
$ AGE13_24 <dbl> 710, 2770, 2650, 2390, 1120, 1920, 1150, 2000, 0, 650, 0, 326…
$ AGE25_64 <dbl> 2780, 15700, 14240, 12460, 3620, 9650, 4350, 11320, 0, 2500, …
2. Append subzone code
We left join pop dataset with mpsz dataset to retrieve the subzone codes.
pop <- pop %>%
left_join(mpsz,
by = c("PA" = "PLN_AREA_N",
"SZ" = "SUBZONE_N")) %>%
select(1:6) %>%
rename(SZ_NAME = SZ,
SZ = SUBZONE_C)
glimpse(pop)Rows: 332
Columns: 6
$ PA <chr> "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG MO KIO", "ANG …
$ SZ_NAME <chr> "ANG MO KIO TOWN CENTRE", "CHENG SAN", "CHONG BOON", "KEBUN B…
$ AGE7_12 <dbl> 310, 1140, 1010, 1050, 420, 810, 390, 980, 0, 260, 0, 1190, 6…
$ AGE13_24 <dbl> 710, 2770, 2650, 2390, 1120, 1920, 1150, 2000, 0, 650, 0, 326…
$ AGE25_64 <dbl> 2780, 15700, 14240, 12460, 3620, 9650, 4350, 11320, 0, 2500, …
$ SZ <chr> "AMSZ01", "AMSZ02", "AMSZ03", "AMSZ06", "AMSZ07", "AMSZ05", "…
3. Prepare origin attribute
We add origin attributes to the flow data.
flow_data1 <- flow_data1 %>%
left_join(pop,
by = c(ORIGIN_SZ = "SZ")) %>%
rename(ORIGIN_AGE7_12 = AGE7_12,
ORIGIN_AGE13_24 = AGE13_24,
ORIGIN_AGE25_64 = AGE25_64) %>%
select(-c(PA, SZ_NAME))4. Prepare destination attribute
We add destination attributes to the flow data.
SIM_data <- flow_data1 %>%
left_join(pop,
by = c(DESTIN_SZ = "SZ")) %>%
rename(DESTIN_AGE7_12 = AGE7_12,
DESTIN_AGE13_24 = AGE13_24,
DESTIN_AGE25_64 = AGE25_64) %>%
select(-c(PA, SZ_NAME))
glimpse(SIM_data)Rows: 14,734
Columns: 12
Groups: ORIGIN_SZ [279]
$ ORIGIN_SZ <chr> "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01", "AMSZ01", "AMS…
$ DESTIN_SZ <chr> "AMSZ01", "AMSZ02", "AMSZ03", "AMSZ04", "AMSZ05", "AMS…
$ TRIPS <dbl> 1998, 8289, 8971, 2252, 6136, 2148, 1620, 1925, 1773, …
$ FlowNoIntra <dbl> 0, 8289, 8971, 2252, 6136, 2148, 1620, 1925, 1773, 63,…
$ offset <dbl> 1e-06, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00, 1e+00…
$ dist <dbl> 50.0000, 810.4491, 1360.9294, 840.4432, 1076.7916, 805…
$ ORIGIN_AGE7_12 <dbl> 310, 310, 310, 310, 310, 310, 310, 310, 310, 310, 310,…
$ ORIGIN_AGE13_24 <dbl> 710, 710, 710, 710, 710, 710, 710, 710, 710, 710, 710,…
$ ORIGIN_AGE25_64 <dbl> 2780, 2780, 2780, 2780, 2780, 2780, 2780, 2780, 2780, …
$ DESTIN_AGE7_12 <dbl> 310, 1140, 1010, 980, 810, 1050, 420, 390, 1190, 0, 0,…
$ DESTIN_AGE13_24 <dbl> 710, 2770, 2650, 2000, 1920, 2390, 1120, 1150, 3260, 0…
$ DESTIN_AGE25_64 <dbl> 2780, 15700, 14240, 11320, 9650, 12460, 3620, 4350, 13…
5. Replace zero values with 0.99
We replace all zero values in the affected explanatory variables with 0.99.
SIM_data$DESTIN_AGE7_12 <- ifelse(
SIM_data$DESTIN_AGE7_12 == 0,
0.99, SIM_data$DESTIN_AGE7_12)
SIM_data$DESTIN_AGE13_24 <- ifelse(
SIM_data$DESTIN_AGE13_24 == 0,
0.99, SIM_data$DESTIN_AGE13_24)
SIM_data$DESTIN_AGE25_64 <- ifelse(
SIM_data$DESTIN_AGE25_64 == 0,
0.99, SIM_data$DESTIN_AGE25_64)
SIM_data$ORIGIN_AGE7_12 <- ifelse(
SIM_data$ORIGIN_AGE7_12 == 0,
0.99, SIM_data$ORIGIN_AGE7_12)
SIM_data$ORIGIN_AGE13_24 <- ifelse(
SIM_data$ORIGIN_AGE13_24 == 0,
0.99, SIM_data$ORIGIN_AGE13_24)
SIM_data$ORIGIN_AGE25_64 <- ifelse(
SIM_data$ORIGIN_AGE25_64 == 0,
0.99, SIM_data$ORIGIN_AGE25_64)summary(SIM_data) ORIGIN_SZ DESTIN_SZ TRIPS FlowNoIntra
Length:14734 Length:14734 Min. : 1 Min. : 0.0
Class :character Class :character 1st Qu.: 14 1st Qu.: 13.0
Mode :character Mode :character Median : 76 Median : 70.0
Mean : 1021 Mean : 839.9
3rd Qu.: 426 3rd Qu.: 379.0
Max. :232187 Max. :148274.0
offset dist ORIGIN_AGE7_12 ORIGIN_AGE13_24
Min. :0.000001 Min. : 50 Min. : 0.99 Min. : 0.99
1st Qu.:1.000000 1st Qu.: 3346 1st Qu.: 240.00 1st Qu.: 440.00
Median :1.000000 Median : 6067 Median : 700.00 Median : 1350.00
Mean :0.982150 Mean : 6880 Mean :1031.86 Mean : 2268.84
3rd Qu.:1.000000 3rd Qu.: 9729 3rd Qu.:1480.00 3rd Qu.: 3260.00
Max. :1.000000 Max. :26136 Max. :6340.00 Max. :16380.00
ORIGIN_AGE25_64 DESTIN_AGE7_12 DESTIN_AGE13_24 DESTIN_AGE25_64
Min. : 0.99 Min. : 0.99 Min. : 0.99 Min. : 0.99
1st Qu.: 2200.00 1st Qu.: 240.00 1st Qu.: 460.00 1st Qu.: 2200.00
Median : 6810.00 Median : 720.00 Median : 1420.00 Median : 7030.00
Mean :10487.62 Mean :1033.40 Mean : 2290.35 Mean :10574.46
3rd Qu.:15770.00 3rd Qu.:1500.00 3rd Qu.: 3260.00 3rd Qu.:15830.00
Max. :74610.00 Max. :6340.00 Max. :16380.00 Max. :74610.00
4. Unconstrained SIM
The model shows that there is a positive relationship (0.82) between origin age 13-24 with the flow. There is an inverse relationship between distance and the total trips (-0.686), which is expected.
uncSIM <- glm(formula = TRIPS ~
log(ORIGIN_AGE7_12) +
log(DESTIN_AGE7_12) +
log(ORIGIN_AGE13_24) +
log(DESTIN_AGE13_24) +
log(ORIGIN_AGE25_64) +
log(DESTIN_AGE25_64) +
log(dist),
family = poisson(link = "log"),
data = SIM_data,
na.action = na.exclude)
uncSIM
Call: glm(formula = TRIPS ~ log(ORIGIN_AGE7_12) + log(DESTIN_AGE7_12) +
log(ORIGIN_AGE13_24) + log(DESTIN_AGE13_24) + log(ORIGIN_AGE25_64) +
log(DESTIN_AGE25_64) + log(dist), family = poisson(link = "log"),
data = SIM_data, na.action = na.exclude)
Coefficients:
(Intercept) log(ORIGIN_AGE7_12) log(DESTIN_AGE7_12)
10.8165 0.2906 0.1211
log(ORIGIN_AGE13_24) log(DESTIN_AGE13_24) log(ORIGIN_AGE25_64)
0.8176 0.4314 -0.7111
log(DESTIN_AGE25_64) log(dist)
-0.4464 -0.6860
Degrees of Freedom: 14733 Total (i.e. Null); 14726 Residual
Null Deviance: 60800000
Residual Deviance: 34170000 AIC: 34260000
We use McFadden’s R-squared which is the metric for evaluating logistic, Poisson, and negative-binomial regression models.
r2_mcfadden(uncSIM)# R2 for Generalized Linear Regression
R2: 0.437
adj. R2: 0.437
5. Geocoding
We can also use postal code for geocoding. The code to do so is shown below.
pacman::p_load(tidyverse, sf, tmap, httr, performance)url <- "https://onemap.gov.sg/api/common/elastic/search"
found <- data.frame()
not_found <- data.frame()
for (postcode in postcode) {
query <- list('searchVal'=postcode, 'returnGeom'='Y',
'getAddrDetails'='Y', 'pageNum'='1')
res <- GET(url, query=query)
if ((content(res)$found) != 0){
found <- rbind(found, data.frame(content(res))[4:13])
} else {not_found = data.frame(postcode)
}
}